import os
import re
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
data_folder = "data/md_traj_analysis/"
files = glob.glob(data_folder+'*rmsd_C-alpha.xvg')
clean_line = lambda line: list(map(float, line.strip().split()))
exclude_regex = lambda line: not re.search("#|@",line)
fig,ax = plt.subplots(1,1,dpi=200)
for path in files:
data = [clean_line(l) for l in open(path,'r').readlines() if exclude_regex(l)]
data = np.array(data)
ax.plot(*data.T,color="k",lw=0.5,alpha=0.25)
ax.plot([],[],color="k",label="all models")
ax.set_xlabel("time (ps)", fontsize=15)
ax.set_ylabel("$C_\\alpha$-RMSD", fontsize=15)
ax.set_xlim(0,100000)
ax.legend(loc="best",fontsize=10)
plt.show()
data_folder = "data/md_traj_analysis/"
clean_line = lambda line: list(map(float, line.strip().split()))
exclude_regex = lambda line: not re.search("#|@",line)
mutant_names = ['cWza', 'cWza-K375C', 'cWza-S355C', 'cWza-Y373C']
fig, ax = plt.subplots(2,2,figsize=(10,6),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
for mutant in mutant_names:
ax = Axes[mutant]
for g in Conformations[mutant]:
files=glob.glob(data_folder+mutant+'_'+'conformation'+str(g)+'_'+'*rmsd_C-alpha.xvg')
for path in files:
data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
data = np.array(data)
ax.plot(*data.T, color=Colors[g], lw=0.25, alpha=0.5)
ax.set_title(mutant)
ax.set_xlabel("time (ps)", fontsize=10)
ax.set_ylabel("$C_\\alpha$-RMSD", fontsize=10)
ax.set_xlim(0, 100000)
fig.tight_layout()
plt.show()
data_folder = "data/md_traj_analysis/"
clean_line = lambda line: list(map(float, line.strip().split()))
exclude_regex = lambda line: not re.search("#|@",line)
mutant_names = ['cWza', 'cWza-K375C', 'cWza-S355C', 'cWza-Y373C']
fig, ax = plt.subplots(2,2,figsize=(7,6),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
time_threshold_ps = 90000 # picoseconds
for mutant in mutant_names:
ax = Axes[mutant]
for g in Conformations[mutant]:
data_last_10ns = []
files=glob.glob(data_folder+mutant+'_'+'conformation'+str(g)+'_'+'*rmsd_C-alpha.xvg')
for path in files:
data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
data = np.array(data)
time, value = data.T
data_last_10ns = data_last_10ns + list(data[time > time_threshold_ps].T[-1])
sns.histplot(data=data_last_10ns, stat="density", kde=True, color=Colors[g], alpha=0.25, ax=ax)
ax.set_title(mutant)
ax.set_xlabel("$C_\\alpha$-RMSD", fontsize=10)
ax.set_ylabel("Density", fontsize=10)
fig.tight_layout()
plt.show()
fig, ax = plt.subplots(2,2,figsize=(7,6),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
time_threshold = 90000 # picosecs
for mutant in mutant_names:
ax = Axes[mutant]
for g in Conformations[mutant]:
data_last_10ns = []
files=glob.glob(data_folder+mutant+'_'+'conformation'+str(g)+'_'+'*rmsd_C-alpha.xvg')
for path in files:
data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
data = np.array(data)
time, value = data.T
if len(data[time > 90000].T[-1]) > 0:
data_last_10ns.append( np.mean(data[time > time_threshold].T[-1]) )
sns.histplot(data=data_last_10ns, stat="density", color=Colors[g], alpha=0.25, ax=ax)
ax.set_title(mutant)
ax.set_xlabel("$C_\\alpha$-RMSD", fontsize=10)
ax.set_ylabel("Density", fontsize=10)
fig.tight_layout()
plt.show()
TOP_LOWEST_RMSD_MODELS = {
'cWza': {0:[], 1:[]},
'cWza-K375C': {0:[], 1:[]},
'cWza-S355C': {0:[], 1:[]},
'cWza-Y373C': {1:[]}
}
N_models = 10
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
datafile_suffix = '_md_100ns_rmsd_C-alpha.xvg'
time_threshold = 90000 # pico-secs
for mutant in mutant_names:
TOP_LOWEST_RMSD_MODELS[mutant] = {}
for group in Conformations[mutant]:
data_output = []
files=glob.glob(data_folder+mutant+'_'+'conformation'+str(group)+'_*'+datafile_suffix)
for path in files:
data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
data = np.array(data)
time, value = data.T
rmsd_data = data[time > time_threshold].T[-1]
model_labels = path.split('/')[-1].split(datafile_suffix)[0]
if len(rmsd_data) > 0: # check for empty dataset
data_output.append([np.mean(rmsd_data), model_labels])
#sort data by RMSD (low to high)
TOP_LOWEST_RMSD_MODELS[mutant][group] = sorted(data_output, key=lambda x: x[0])[:N_models]
Print out data
import pandas as pd
pd.set_option("display.max_colwidth", None)
pd.set_option('display.colheader_justify', 'center')
model_lowest_rmsd = []
for mutant in mutant_names:
for group in Conformations[mutant]:
for item in TOP_LOWEST_RMSD_MODELS[mutant][group]:
model_lowest_rmsd.append(item)
df = pd.DataFrame(model_lowest_rmsd, columns=['Mean RMSD (90-100ns)[nm]', 'Model tags'])
df.style.set_properties(**{'text-align': 'center'})
| Mean RMSD (90-100ns)[nm] | Model tags | |
|---|---|---|
| 0 | 0.334172 | cWza_conformation0_refined1_0001_INPUT_0231_ignorechain |
| 1 | 0.345286 | cWza_conformation0_refined1_0001_INPUT_0227_ignorechain |
| 2 | 0.353893 | cWza_conformation0_refined1_0001_INPUT_0742_ignorechain |
| 3 | 0.357059 | cWza_conformation0_refined1_0001_INPUT_0843_ignorechain |
| 4 | 0.359771 | cWza_conformation0_refined1_0001_INPUT_0736_ignorechain |
| 5 | 0.362919 | cWza_conformation0_refined1_0001_INPUT_0553_ignorechain |
| 6 | 0.366797 | cWza_conformation0_refined1_0001_INPUT_0813_ignorechain |
| 7 | 0.368353 | cWza_conformation0_refined1_0001_INPUT_0595_ignorechain |
| 8 | 0.379361 | cWza_conformation0_refined1_0001_INPUT_0188_ignorechain |
| 9 | 0.382846 | cWza_conformation0_refined1_0001_INPUT_0433_ignorechain |
| 10 | 0.210962 | cWza_conformation1_refined1_0001_INPUT_0198_ignorechain |
| 11 | 0.212629 | cWza_conformation1_refined1_0001_INPUT_0291_ignorechain |
| 12 | 0.224956 | cWza_conformation1_refined1_0001_INPUT_0058_ignorechain |
| 13 | 0.225439 | cWza_conformation1_refined1_0001_INPUT_0173_ignorechain |
| 14 | 0.237867 | cWza_conformation1_refined1_0001_INPUT_0731_ignorechain |
| 15 | 0.249312 | cWza_conformation1_refined1_0001_INPUT_0488_ignorechain |
| 16 | 0.273101 | cWza_conformation1_refined1_0001_INPUT_0247_ignorechain |
| 17 | 0.318953 | cWza_conformation1_refined1_0001_INPUT_0324_ignorechain |
| 18 | 0.325123 | cWza_conformation1_refined1_0001_INPUT_0271_ignorechain |
| 19 | 0.339578 | cWza_conformation1_refined1_0001_INPUT_0560_ignorechain |
| 20 | 0.313111 | cWza-K375C_conformation0_refined1_0001_INPUT_0645_ignorechain |
| 21 | 0.324444 | cWza-K375C_conformation0_refined1_0001_INPUT_0023_ignorechain |
| 22 | 0.325506 | cWza-K375C_conformation0_refined1_0001_INPUT_0301_ignorechain |
| 23 | 0.329557 | cWza-K375C_conformation0_refined1_0001_INPUT_0875_ignorechain |
| 24 | 0.332487 | cWza-K375C_conformation0_refined1_0001_INPUT_0874_ignorechain |
| 25 | 0.343934 | cWza-K375C_conformation0_refined1_0001_INPUT_0716_ignorechain |
| 26 | 0.360332 | cWza-K375C_conformation0_refined1_0001_INPUT_0806_ignorechain |
| 27 | 0.361159 | cWza-K375C_conformation0_refined1_0001_INPUT_0243_ignorechain |
| 28 | 0.379658 | cWza-K375C_conformation0_refined1_0001_INPUT_0340_ignorechain |
| 29 | 0.414391 | cWza-K375C_conformation0_refined1_0001_INPUT_0394_ignorechain |
| 30 | 0.329132 | cWza-K375C_conformation1_refined1_0001_INPUT_0448_ignorechain |
| 31 | 0.352215 | cWza-K375C_conformation1_refined1_0001_INPUT_0121_ignorechain |
| 32 | 0.398857 | cWza-K375C_conformation1_refined1_0001_INPUT_0627_ignorechain |
| 33 | 0.425140 | cWza-K375C_conformation1_refined1_0001_INPUT_0747_ignorechain |
| 34 | 0.394105 | cWza-S355C_conformation0_refined1_0001_INPUT_0273_ignorechain |
| 35 | 0.395606 | cWza-S355C_conformation0_refined1_0001_INPUT_0193_ignorechain |
| 36 | 0.396527 | cWza-S355C_conformation0_refined1_0001_INPUT_0387_ignorechain |
| 37 | 0.402939 | cWza-S355C_conformation0_refined1_0001_INPUT_0617_ignorechain |
| 38 | 0.402999 | cWza-S355C_conformation0_refined1_0001_INPUT_0722_ignorechain |
| 39 | 0.404175 | cWza-S355C_conformation0_refined1_0001_INPUT_0257_ignorechain |
| 40 | 0.417472 | cWza-S355C_conformation0_refined1_0001_INPUT_0877_ignorechain |
| 41 | 0.418410 | cWza-S355C_conformation0_refined1_0001_INPUT_0140_ignorechain |
| 42 | 0.431690 | cWza-S355C_conformation0_refined1_0001_INPUT_0771_ignorechain |
| 43 | 0.444226 | cWza-S355C_conformation0_refined1_0001_INPUT_0076_ignorechain |
| 44 | 0.295457 | cWza-S355C_conformation1_refined1_0001_INPUT_0050_ignorechain |
| 45 | 0.348642 | cWza-S355C_conformation1_refined1_0001_INPUT_0407_ignorechain |
| 46 | 0.355704 | cWza-S355C_conformation1_refined1_0001_INPUT_0833_ignorechain |
| 47 | 0.360430 | cWza-S355C_conformation1_refined1_0001_INPUT_0113_ignorechain |
| 48 | 0.364094 | cWza-S355C_conformation1_refined1_0001_INPUT_0579_ignorechain |
| 49 | 0.367821 | cWza-S355C_conformation1_refined1_0001_INPUT_0417_ignorechain |
| 50 | 0.374344 | cWza-S355C_conformation1_refined1_0001_INPUT_0470_ignorechain |
| 51 | 0.378966 | cWza-S355C_conformation1_refined1_0001_INPUT_0682_ignorechain |
| 52 | 0.381657 | cWza-S355C_conformation1_refined1_0001_INPUT_0301_ignorechain |
| 53 | 0.382152 | cWza-S355C_conformation1_refined1_0001_INPUT_0295_ignorechain |
| 54 | 0.306335 | cWza-Y373C_conformation1_refined1_0001_INPUT_0337_ignorechain |
| 55 | 0.343982 | cWza-Y373C_conformation1_refined1_0001_INPUT_0560_ignorechain |
| 56 | 0.354060 | cWza-Y373C_conformation1_refined1_0001_INPUT_0325_ignorechain |
| 57 | 0.367888 | cWza-Y373C_conformation1_refined1_0001_INPUT_0213_ignorechain |
| 58 | 0.371924 | cWza-Y373C_conformation1_refined1_0001_INPUT_0995_ignorechain |
| 59 | 0.376451 | cWza-Y373C_conformation1_refined1_0001_INPUT_0826_ignorechain |
| 60 | 0.378569 | cWza-Y373C_conformation1_refined1_0001_INPUT_0229_ignorechain |
| 61 | 0.382964 | cWza-Y373C_conformation1_refined1_0001_INPUT_0209_ignorechain |
| 62 | 0.393463 | cWza-Y373C_conformation1_refined1_0001_INPUT_0504_ignorechain |
| 63 | 0.401435 | cWza-Y373C_conformation1_refined1_0001_INPUT_0646_ignorechain |
fig, ax = plt.subplots(2,2,figsize=(10,6),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
datafile_suffix = '_md_100ns_rmsd_C-alpha.xvg'
for mutant in mutant_names:
ax = Axes[mutant]
for group in Conformations[mutant]:
data_models = TOP_LOWEST_RMSD_MODELS[mutant][group]
for i in range(len(data_models)):
model_tags = data_models[i][-1]
path = data_folder+model_tags+datafile_suffix
rmsd_data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
rmsd_data = np.array(rmsd_data)
ax.plot(*rmsd_data.T, color=Colors[group], lw=0.25, alpha=0.5)
ax.set_title(mutant)
ax.set_xlabel("time (ps)", fontsize=10)
ax.set_ylabel("$C_\\alpha$-RMSD", fontsize=10)
ax.set_xlim(0, 100000)
ax.set_ylim(0, 0.7)
suptitle = "Top "+str(N_models)+" lowest-RMSD models"
fig.suptitle(suptitle)
fig.tight_layout()
plt.show()
import pandas as pd
pd.set_option("display.max_colwidth", None)
pd.set_option('display.colheader_justify', 'center')
model_lowest_rmsd = []
for mutant in mutant_names:
for group in Conformations[mutant]:
model_lowest_rmsd.append(TOP_LOWEST_RMSD_MODELS[mutant][group][0])
df = pd.DataFrame(model_lowest_rmsd, columns=['Mean RMSD (90-100ns)[nm]', 'Model tags'])
df.style.set_properties(**{'text-align': 'center'})
| Mean RMSD (90-100ns)[nm] | Model tags | |
|---|---|---|
| 0 | 0.334172 | cWza_conformation0_refined1_0001_INPUT_0231_ignorechain |
| 1 | 0.210962 | cWza_conformation1_refined1_0001_INPUT_0198_ignorechain |
| 2 | 0.313111 | cWza-K375C_conformation0_refined1_0001_INPUT_0645_ignorechain |
| 3 | 0.329132 | cWza-K375C_conformation1_refined1_0001_INPUT_0448_ignorechain |
| 4 | 0.394105 | cWza-S355C_conformation0_refined1_0001_INPUT_0273_ignorechain |
| 5 | 0.295457 | cWza-S355C_conformation1_refined1_0001_INPUT_0050_ignorechain |
| 6 | 0.306335 | cWza-Y373C_conformation1_refined1_0001_INPUT_0337_ignorechain |
Plot their RMSD
fig, ax = plt.subplots(2,2,figsize=(10,6),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
datafile_suffix = '_md_100ns_rmsd_C-alpha.xvg'
for mutant in mutant_names:
ax = Axes[mutant]
for group in Conformations[mutant]:
data_models = TOP_LOWEST_RMSD_MODELS[mutant][group]
model_tags = data_models[0][-1] #data lowest-RMSD model
path = data_folder+model_tags+datafile_suffix
rmsd_data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
rmsd_data = np.array(rmsd_data)
zorder = -int(np.mean(rmsd_data.T[-1])*100)
color = Colors[group]
ax.fill_between(*rmsd_data.T, color=color, lw=1, alpha=0.35, zorder=zorder)
# ax.plot(*rmsd_data.T, color=color, lw=1, label=model_tags, alpha=0.75)
ax.plot(*rmsd_data.T, color=color, lw=1, label='conformation '+str(group), alpha=0.75)
# ax.set_title("Lowest-RMSD models")
# ax.set_xlabel("time (ps)", fontsize=15)
# ax.set_ylabel("$C_\\alpha$-RMSD", fontsize=15)
ax.set_title(mutant,fontsize=20)
ax.set_xlabel("time (ps)", fontsize=12)
ax.set_ylabel("$C_\\alpha$-RMSD", fontsize=12)
ax.set_xlim(0, 100000)
ax.set_ylim(0, 0.7)
ax.legend(loc="best")
fig.suptitle("Lowest $C_{\\alpha}$-RMSD (90-100ns) models",fontsize=15)
fig.tight_layout()
plt.show()
fig, ax = plt.subplots(1,1,figsize=(10,6),dpi=200)
datafile_suffix = '_md_100ns_rmsd_C-alpha.xvg'
for mutant in mutant_names:
for group in Conformations[mutant]:
data_models = TOP_LOWEST_RMSD_MODELS[mutant][group]
model_tags = data_models[0][-1] #data lowest-RMSD model
path = data_folder+model_tags+datafile_suffix
rmsd_data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
rmsd_data = np.array(rmsd_data)
zorder = -int(np.mean(rmsd_data.T[-1])*100)
ax.fill_between(*rmsd_data.T, lw=1, alpha=0.35, zorder=zorder)
ax.plot(*rmsd_data.T, lw=1, label=model_tags, alpha=0.75)
ax.set_title("Lowest-RMSD models")
ax.set_xlabel("time (ps)", fontsize=15)
ax.set_ylabel("$C_\\alpha$-RMSD", fontsize=15)
ax.set_xlim(0, 100000)
ax.set_ylim(0, 0.7)
ax.legend(loc="best")
fig.tight_layout()
plt.show()
Slides featuring screenshots of the RMSD-selected models with respect to their original docking models and a model of wild-type Wza-D4 can be found here
models_paths = [
"cWza/conformation0/refined1_0001_INPUT_0231_ignorechain/",
"cWza/conformation1/refined1_0001_INPUT_0198_ignorechain/",
"cWza-K375C/conformation0/refined1_0001_INPUT_0645_ignorechain/",
"cWza-K375C/conformation1/refined1_0001_INPUT_0448_ignorechain/",
"cWza-S355C/conformation0/refined1_0001_INPUT_0273_ignorechain/",
"cWza-S355C/conformation1/refined1_0001_INPUT_0050_ignorechain/",
"cWza-Y373C/conformation1/refined1_0001_INPUT_0337_ignorechain/"
]
infile = "data/md_selected_models/HOLE_conductance_data_selected_models.txt"
Gmacro_data = []
data = []
for l in open(infile, 'r').readlines():
if "&" not in l:
G = l.split()[7] # Gpred Rmin
#G = l.split()[5] # Gmacro
data.append(0.001*float(G))
else:
Gmacro_data.append(data)
next
data = []
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
fig = plt.figure(dpi=200)
cm = mpl.cm.get_cmap('Set2')
t_initial = 0
t_final = 100
n_frames = 1001
timeline = np.linspace(t_initial,t_final,n_frames)
for i in range(len(Gmacro_data)):
dataset = np.array(Gmacro_data[i]).T
label = models_paths[i]
plt.plot(timeline,dataset,color=cm(i),lw=0.3,alpha=0.5)
plt.scatter(timeline,dataset, s=5,color='white',edgecolor=cm(i),label=label,alpha=0.75)
# H-state cWza model (crystal structure)
Gpred_cWza_Hstate = [1.2]
plt.plot((0,100),2*Gpred_cWza_Hstate,lw=1.5,linestyle='--',color='violet',alpha=1,zorder=10)
plt.text(80,1.33,'H-state (NChem 2017)',rotation=0,fontsize=5,
bbox=dict(facecolor='white',edgecolor='violet',alpha=0.5, boxstyle='round, pad=0.5'))
#L-state cWza model (CCBuilder and BUDE optimised)
Gpred_cWza_Lstate = [0.4]
plt.plot((0,100),2*Gpred_cWza_Lstate,lw=1.5,linestyle='--',color='red',alpha=1,zorder=10)
plt.text(80,0.2,'L-state (NChem 2017)',rotation=0,fontsize=5,
bbox=dict(facecolor='white',edgecolor='red',alpha=0.5,boxstyle='round, pad=0.5'))
# customise plot
plt.title("Lowest MD $C_{\\alpha}$-RMSD models")
plt.legend(loc='best', fontsize=5)
plt.xlabel("time (ns)", fontsize=15)
plt.ylabel("$G_{pred}$ (nS)", fontsize=15)
plt.xlim(0,100)
plt.ylim(0,5)
plt.grid(True)
plt.show()
HISTORGRAM
df = pd.DataFrame(columns=['G','model'])
for i in range(len(Gmacro_data)):
dataset = np.array(Gmacro_data[i]).T
df_per_model = pd.DataFrame(np.column_stack([dataset,np.tile([i],len(dataset))]), columns=['G','model'])
df = df.append(df_per_model, ignore_index=True)
fig,ax = plt.subplots(1,1,figsize=(6,5),dpi=200)
histograms = sns.histplot(
data=df[df.G < 3],
x='G',
hue='model',
stat='density',
kde=True,
palette='Set2',
element="poly",
legend=False,
alpha=0.45,
ax=ax
)
labels = models_paths
labels.reverse()
histograms.legend(fontsize=5,facecolor='white',labels=labels)
ax.set_title("Lowest MD $C_{\\alpha}$-RMSD models")
ax.set_xlabel("$G_{pred}$ (nS)", fontsize=15)
ax.set_ylabel('probability density', fontsize=15)
ax.set_xlim(0,3)
ax.set_ylim(0,0.75)
ylims = ax.get_ylim()
# H-state cWza model (crystal structure)
Gpred_cWza_Hstate = [1.2]
ax.plot(2*Gpred_cWza_Hstate,ylims,lw=1.5,linestyle='--',color='violet',alpha=1,zorder=10)
ax.text(1.26,0.4,'H-state (NChem 2017)',rotation=90,fontsize=5,
bbox=dict(facecolor='white',edgecolor='violet',alpha=0.5, boxstyle='round, pad=0.5'))
#L-state cWza model (CCBuilder and BUDE optimised)
Gpred_cWza_Lstate = [0.4]
ax.plot(2*Gpred_cWza_Lstate,ylims,lw=1.5,linestyle='--',color='red',alpha=1,zorder=10)
plt.text(0.45,0.4,'L-state (NChem 2017)',rotation=90,fontsize=5,
bbox=dict(facecolor='white',edgecolor='red',alpha=0.5,boxstyle='round, pad=0.5'))
ax.grid(True)
plt.show()
Code lines for protein frame extraction and HOLE execution
paths_list_protein_extraction.txt to directories containing .xtc MD trajectory filescWza/conformation0/refined1_0001_INPUT_0231_ignorechain/
cWza/conformation1/refined1_0001_INPUT_0198_ignorechain/
cWza-K375C/conformation0/refined1_0001_INPUT_0645_ignorechain/
cWza-K375C/conformation1/refined1_0001_INPUT_0448_ignorechain/
cWza-S355C/conformation0/refined1_0001_INPUT_0273_ignorechain/
cWza-S355C/conformation1/refined1_0001_INPUT_0050_ignorechain/
cWza-Y373C/conformation1/refined1_0001_INPUT_0337_ignorechain/
bash commands for using frame extraction Python script to store PDB frames in folder md_100nsfor path in `cat paths_list_protein_extraction.txt`; do
python ~/mpmodeling/tools/protein_frame_extractor.py md_100ns $path;
done
run_hole for every PDB file with prefix Protein(for n in `seq 900 1000`; do run_hole Protein_${n}.pdb ; done) &
Compute distance between residue pairs in trajectory showing conductance transition
NOTE This solve the import issue temporarily only!
import sys
sys.path[7] = ''
sys.path
['/data/dragon000/sanjuan/bristol/cwza/phd-notebooks/Analyses/figures_nbs', '/data/dragon000/sanjuan/anaconda3/bin/python', '/data/dragon000/sanjuan/research/SAbDab/lib/python/ABDB', '/data/dragon000/sanjuan/anaconda3/lib/python38.zip', '/data/dragon000/sanjuan/anaconda3/lib/python3.8', '/data/dragon000/sanjuan/anaconda3/lib/python3.8/lib-dynload', '', '', '/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages', '/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/ldds-0.1.0-py3.8.egg', '/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/locket-0.2.1-py3.8.egg', '/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/IPython/extensions', '/homes/sanjuan/.ipython']
import MDAnalysis as mda
from MDAnalysis.tests.datafiles import PDB_small
from MDAnalysis.analysis import distances
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
u = mda.Universe(PDB_small)
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/topology/PDBParser.py:330: UserWarning: Element information is absent or missing for a few atoms. Elements attributes will not be populated.
warnings.warn("Element information is absent or missing for a few "
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/topology/base.py:203: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
residx = np.zeros_like(criteria[0], dtype=np.int)
LID_ca = u.select_atoms('name CA and resid 122-159')
NMP_ca = u.select_atoms('name CA and resid 30-59')
n_LID = len(LID_ca)
n_NMP = len(NMP_ca)
print('LID has {} residues and NMP has {} residues'.format(n_LID, n_NMP))
LID has 38 residues and NMP has 30 residues
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/selection.py:690: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations mask = np.zeros(len(vals), dtype=np.bool)
dist_arr = distances.distance_array(LID_ca.positions, # reference
NMP_ca.positions, # configuration
box=u.dimensions)
dist_arr.shape
(38, 30)
fig, ax = plt.subplots()
im = ax.imshow(dist_arr, origin='upper')
# add residue ID labels to axes
tick_interval = 5
ax.set_yticks(np.arange(n_LID)[::tick_interval])
ax.set_xticks(np.arange(n_NMP)[::tick_interval])
ax.set_yticklabels(LID_ca.resids[::tick_interval])
ax.set_xticklabels(NMP_ca.resids[::tick_interval])
# add figure labels and titles
plt.ylabel('LID')
plt.xlabel('NMP')
plt.title('Distance between alpha-carbon')
# colorbar
cbar = fig.colorbar(im)
cbar.ax.set_ylabel('Distance (Angstrom)')
Text(0, 0.5, 'Distance (Angstrom)')
Distances between residues
LID = u.select_atoms('resid 122-159')
NMP = u.select_atoms('resid 30-59')
LID_com = LID.center_of_mass(compound='residues')
NMP_com = NMP.center_of_mass(compound='residues')
n_LID = len(LID_com)
n_NMP = len(NMP_com)
print('LID has {} residues and NMP has {} residues'.format(n_LID, n_NMP))
LID has 38 residues and NMP has 30 residues
res_dist = distances.distance_array(LID_com, NMP_com,
box=u.dimensions)
fig2, ax2 = plt.subplots()
im2 = ax2.imshow(res_dist, origin='upper')
# add residue ID labels to axes
tick_interval = 5
ax2.set_yticks(np.arange(n_LID)[::tick_interval])
ax2.set_xticks(np.arange(n_NMP)[::tick_interval])
ax2.set_yticklabels(LID.residues.resids[::tick_interval])
ax2.set_xticklabels(NMP.residues.resids[::tick_interval])
# add figure labels and titles
plt.ylabel('LID')
plt.xlabel('NMP')
plt.title('Distance between center-of-mass')
# colorbar
cbar2 = fig2.colorbar(im)
cbar2.ax.set_ylabel('Distance (Angstrom)')
/tmp/ipykernel_2441428/3118276872.py:17: MatplotlibDeprecationWarning: Starting from Matplotlib 3.6, colorbar() will steal space from the mappable's axes, rather than from the current axes, to place the colorbar. To silence this warning, explicitly pass the 'ax' argument to colorbar(). cbar2 = fig2.colorbar(im)
Text(0, 0.5, 'Distance (Angstrom)')
import MDAnalysis as mda
workdir = "data/cWza-S355C_conformation1_0050/"
u = mda.Universe(workdir+'md_100ns.tpr', workdir+'md_100ns.xtc',in_memory=True)
chains = "abcdefgh"
k = 0 # chain selection
selection = "resname TYR and segid "+"seg_"+str(k)+"_Protein_chain_"+chains[k].upper()
tyrosines = u.select_atoms(selection, updating=True)
selection = "resname GLU and segid "+"seg_"+str(k)+"_Protein_chain_"+chains[k].upper()
glutamates = u.select_atoms(selection, updating=True)
data = []
for ts in u.trajectory:
tyr_com = tyrosines.center_of_mass()
glu_com = glutamates.center_of_mass()
distance_tyr_glu = np.linalg.norm(tyr_com - glu_com)
data.append(distance_tyr_glu)
fig,ax = plt.subplots(1,1, dpi=100)
ax.plot(data)
plt.show()
Put all together
fig,ax = plt.subplots(1,1,dpi=100)
chains = "abcdefgh"
for k in range(len(chains)):
tyr_selection = "resname TYR and segid "+"seg_"+str(k)+"_Protein_chain_"+chains[k].upper()
glu_selection = "resname GLU and segid "+"seg_"+str(k)+"_Protein_chain_"+chains[k].upper()
tyrosines = u.select_atoms(tyr_selection, updating=True)
glutamates = u.select_atoms(glu_selection, updating=True)
data = []
for ts in u.trajectory:
tyr_com = tyrosines.center_of_mass()
glu_com = glutamates.center_of_mass()
distance_tyr_glu = np.linalg.norm(tyr_com - glu_com)
data.append(distance_tyr_glu)
ax.plot(data, label='chain '+chains[k],lw=0.5)
plt.show()
tyr_selection = "resname TYR"
glu_selection = "resname GLU"
tyrosines = u.select_atoms(tyr_selection, updating=True)
glutamates = u.select_atoms(glu_selection, updating=True)
data = []
for ts in u.trajectory:
tyr_com = tyrosines.center_of_mass()
glu_com = glutamates.center_of_mass()
distance_tyr_glu = tyr_com - glu_com
data.append(distance_tyr_glu)
fig,ax=plt.subplots(1,1,dpi=100)
ax.plot(np.linalg.norm(np.array(data),axis=1),lw=0.5)
plt.show()
fig,ax = plt.subplots(3,1,figsize=(5,10),dpi=100)
for k in range(3):
ax[k].plot(np.array(data).T[k],lw=0.5)
plt.show()
import MDAnalysis as mda
workdir = "data/cWza-S355C_conformation1_0050/"
u = mda.Universe(workdir+'md_100ns.tpr', workdir+'md_100ns.xtc',in_memory=True)
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("protein")
hbonds.acceptors_sel = hbonds.guess_acceptors("protein")
hbonds.run()
<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7f765976eeb0>
hbonds.count_by_type()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:539: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations d = self.u.atoms[self.hbonds[:, 1].astype(np.int)] /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:543: DeprecationWarning: `np.str` is a deprecated alias for the builtin `str`. To silence this warning, use `str` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.str_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations dtype=np.str).T
array([['ARG:opls_300', 'ARG:opls_300', '48314'],
['ARG:opls_300', 'ARG:opls_303', '48338'],
['ARG:opls_300', 'ASP:opls_272', '7179'],
['ARG:opls_300', 'GLN:opls_237', '4'],
['ARG:opls_300', 'GLU:opls_272', '4054'],
['ARG:opls_300', 'HIS:opls_511', '6'],
['ARG:opls_300', 'THR:opls_154', '2632'],
['ARG:opls_300', 'THR:opls_272', '1088'],
['ARG:opls_300', 'TRP:opls_503', '22'],
['ARG:opls_300', 'TYR:opls_167', '287'],
['ARG:opls_303', 'ARG:opls_300', '24773'],
['ARG:opls_303', 'GLN:opls_237', '3'],
['ARG:opls_303', 'GLU:opls_272', '1254'],
['ARG:opls_303', 'THR:opls_154', '199'],
['ARG:opls_303', 'THR:opls_272', '124'],
['ARG:opls_303', 'TYR:opls_167', '9'],
['ASN:opls_237', 'GLN:opls_237', '18'],
['ASN:opls_237', 'THR:opls_154', '28'],
['ASP:opls_238', 'ASP:opls_272', '40'],
['GLN:opls_237', 'ARG:opls_300', '4'],
['GLN:opls_237', 'ARG:opls_303', '6'],
['GLN:opls_237', 'ASN:opls_237', '18'],
['GLN:opls_238', 'GLN:opls_237', '2'],
['GLU:opls_238', 'THR:opls_154', '51'],
['GLY:opls_238', 'THR:opls_154', '2'],
['HIS:opls_503', 'ARG:opls_300', '3'],
['HIS:opls_503', 'ASP:opls_272', '3403'],
['HIS:opls_503', 'THR:opls_154', '10'],
['ILE:opls_238', 'THR:opls_154', '760'],
['LEU:opls_238', 'ASN:opls_237', '1'],
['LEU:opls_238', 'ASP:opls_272', '1'],
['LYS:opls_238', 'THR:opls_154', '4'],
['LYS:opls_287', 'GLU:opls_272', '1144'],
['LYS:opls_287', 'THR:opls_154', '62'],
['LYS:opls_287', 'THR:opls_272', '911'],
['LYS:opls_287', 'TYR:opls_167', '112'],
['THR:opls_154', 'ARG:opls_300', '1271'],
['THR:opls_154', 'ARG:opls_303', '167'],
['THR:opls_154', 'ASN:opls_237', '14'],
['THR:opls_154', 'ASP:opls_272', '520'],
['THR:opls_154', 'GLU:opls_272', '533'],
['THR:opls_154', 'HIS:opls_511', '10'],
['THR:opls_154', 'THR:opls_154', '4'],
['THR:opls_154', 'THR:opls_272', '1089'],
['THR:opls_154', 'TYR:opls_167', '62'],
['THR:opls_238', 'GLU:opls_272', '3'],
['THR:opls_238', 'THR:opls_154', '29962'],
['THR:opls_238', 'THR:opls_272', '7774'],
['THR:opls_238', 'TYR:opls_167', '1'],
['TRP:opls_503', 'ARG:opls_300', '11'],
['TYR:opls_167', 'ARG:opls_300', '143'],
['TYR:opls_167', 'ARG:opls_303', '9'],
['TYR:opls_167', 'ASP:opls_272', '67'],
['TYR:opls_167', 'GLU:opls_272', '2647'],
['TYR:opls_167', 'THR:opls_154', '64'],
['TYR:opls_167', 'THR:opls_272', '74'],
['TYR:opls_167', 'TYR:opls_167', '130'],
['VAL:opls_238', 'THR:opls_154', '347']], dtype='<U21')
list(u.atoms[:100])
[<Atom 1: N of type opls_287 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 2: H1 of type opls_290 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 3: H2 of type opls_290 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 4: H3 of type opls_290 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 5: CA of type opls_293B of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 6: HA of type opls_140 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 7: CB of type opls_135 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 8: HB1 of type opls_140 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 9: HB2 of type opls_140 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 10: HB3 of type opls_140 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 11: C of type opls_235 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 12: O of type opls_236 of resname ALA, resid 0 and segid seg_0_Protein_chain_A>, <Atom 13: N of type opls_239 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 14: CA of type opls_246 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 15: HA of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 16: CB of type opls_136 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 17: HB1 of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 18: HB2 of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 19: CG of type opls_136 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 20: HG1 of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 21: HG2 of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 22: CD of type opls_245 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 23: HD1 of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 24: HD2 of type opls_140 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 25: C of type opls_235 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 26: O of type opls_236 of resname PRO, resid 1 and segid seg_0_Protein_chain_A>, <Atom 27: N of type opls_238 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 28: H of type opls_241 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 29: CA of type opls_224B of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 30: HA of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 31: CB of type opls_136 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 32: HB1 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 33: HB2 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 34: CG of type opls_137 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 35: HG of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 36: CD1 of type opls_135 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 37: HD11 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 38: HD12 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 39: HD13 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 40: CD2 of type opls_135 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 41: HD21 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 42: HD22 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 43: HD23 of type opls_140 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 44: C of type opls_235 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 45: O of type opls_236 of resname LEU, resid 2 and segid seg_0_Protein_chain_A>, <Atom 46: N of type opls_238 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 47: H of type opls_241 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 48: CA of type opls_224B of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 49: HA of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 50: CB of type opls_137 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 51: HB of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 52: CG1 of type opls_135 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 53: HG11 of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 54: HG12 of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 55: HG13 of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 56: CG2 of type opls_135 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 57: HG21 of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 58: HG22 of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 59: HG23 of type opls_140 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 60: C of type opls_235 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 61: O of type opls_236 of resname VAL, resid 3 and segid seg_0_Protein_chain_A>, <Atom 62: N of type opls_238 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 63: H of type opls_241 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 64: CA of type opls_224B of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 65: HA of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 66: CB of type opls_136 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 67: HB1 of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 68: HB2 of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 69: CG of type opls_308 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 70: HG1 of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 71: HG2 of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 72: CD of type opls_307 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 73: HD1 of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 74: HD2 of type opls_140 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 75: NE of type opls_303 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 76: HE of type opls_304 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 77: CZ of type opls_302 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 78: NH1 of type opls_300 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 79: HH11 of type opls_301 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 80: HH12 of type opls_301 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 81: NH2 of type opls_300 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 82: HH21 of type opls_301 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 83: HH22 of type opls_301 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 84: C of type opls_235 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 85: O of type opls_236 of resname ARG, resid 4 and segid seg_0_Protein_chain_A>, <Atom 86: N of type opls_238 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 87: H of type opls_241 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 88: CA of type opls_224B of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 89: HA of type opls_140 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 90: CB of type opls_136 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 91: HB1 of type opls_140 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 92: HB2 of type opls_140 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 93: CG of type opls_500 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 94: CD1 of type opls_514 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 95: HD1 of type opls_146 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 96: CD2 of type opls_501 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 97: NE1 of type opls_503 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 98: HE1 of type opls_504 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 99: CE2 of type opls_502 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>, <Atom 100: CE3 of type opls_145 of resname TRP, resid 5 and segid seg_0_Protein_chain_A>]
hbonds.count_by_type()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:539: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations d = self.u.atoms[self.hbonds[:, 1].astype(np.int)] /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:543: DeprecationWarning: `np.str` is a deprecated alias for the builtin `str`. To silence this warning, use `str` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.str_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations dtype=np.str).T
array([['ARG:opls_300', 'ARG:opls_300', '48314'],
['ARG:opls_300', 'ARG:opls_303', '48338'],
['ARG:opls_300', 'ASP:opls_272', '7179'],
['ARG:opls_300', 'GLN:opls_237', '4'],
['ARG:opls_300', 'GLU:opls_272', '4054'],
['ARG:opls_300', 'HIS:opls_511', '6'],
['ARG:opls_300', 'THR:opls_154', '2632'],
['ARG:opls_300', 'THR:opls_272', '1088'],
['ARG:opls_300', 'TRP:opls_503', '22'],
['ARG:opls_300', 'TYR:opls_167', '287'],
['ARG:opls_303', 'ARG:opls_300', '24773'],
['ARG:opls_303', 'GLN:opls_237', '3'],
['ARG:opls_303', 'GLU:opls_272', '1254'],
['ARG:opls_303', 'THR:opls_154', '199'],
['ARG:opls_303', 'THR:opls_272', '124'],
['ARG:opls_303', 'TYR:opls_167', '9'],
['ASN:opls_237', 'GLN:opls_237', '18'],
['ASN:opls_237', 'THR:opls_154', '28'],
['ASP:opls_238', 'ASP:opls_272', '40'],
['GLN:opls_237', 'ARG:opls_300', '4'],
['GLN:opls_237', 'ARG:opls_303', '6'],
['GLN:opls_237', 'ASN:opls_237', '18'],
['GLN:opls_238', 'GLN:opls_237', '2'],
['GLU:opls_238', 'THR:opls_154', '51'],
['GLY:opls_238', 'THR:opls_154', '2'],
['HIS:opls_503', 'ARG:opls_300', '3'],
['HIS:opls_503', 'ASP:opls_272', '3403'],
['HIS:opls_503', 'THR:opls_154', '10'],
['ILE:opls_238', 'THR:opls_154', '760'],
['LEU:opls_238', 'ASN:opls_237', '1'],
['LEU:opls_238', 'ASP:opls_272', '1'],
['LYS:opls_238', 'THR:opls_154', '4'],
['LYS:opls_287', 'GLU:opls_272', '1144'],
['LYS:opls_287', 'THR:opls_154', '62'],
['LYS:opls_287', 'THR:opls_272', '911'],
['LYS:opls_287', 'TYR:opls_167', '112'],
['THR:opls_154', 'ARG:opls_300', '1271'],
['THR:opls_154', 'ARG:opls_303', '167'],
['THR:opls_154', 'ASN:opls_237', '14'],
['THR:opls_154', 'ASP:opls_272', '520'],
['THR:opls_154', 'GLU:opls_272', '533'],
['THR:opls_154', 'HIS:opls_511', '10'],
['THR:opls_154', 'THR:opls_154', '4'],
['THR:opls_154', 'THR:opls_272', '1089'],
['THR:opls_154', 'TYR:opls_167', '62'],
['THR:opls_238', 'GLU:opls_272', '3'],
['THR:opls_238', 'THR:opls_154', '29962'],
['THR:opls_238', 'THR:opls_272', '7774'],
['THR:opls_238', 'TYR:opls_167', '1'],
['TRP:opls_503', 'ARG:opls_300', '11'],
['TYR:opls_167', 'ARG:opls_300', '143'],
['TYR:opls_167', 'ARG:opls_303', '9'],
['TYR:opls_167', 'ASP:opls_272', '67'],
['TYR:opls_167', 'GLU:opls_272', '2647'],
['TYR:opls_167', 'THR:opls_154', '64'],
['TYR:opls_167', 'THR:opls_272', '74'],
['TYR:opls_167', 'TYR:opls_167', '130'],
['VAL:opls_238', 'THR:opls_154', '347']], dtype='<U21')
data = hbonds.count_by_type()
data_sorted = sorted(data, key=lambda x: int(x[-1]), reverse=True)
data_sorted
[array(['ARG:opls_300', 'ARG:opls_303', '48338'], dtype='<U21'), array(['ARG:opls_300', 'ARG:opls_300', '48314'], dtype='<U21'), array(['THR:opls_238', 'THR:opls_154', '29962'], dtype='<U21'), array(['ARG:opls_303', 'ARG:opls_300', '24773'], dtype='<U21'), array(['THR:opls_238', 'THR:opls_272', '7774'], dtype='<U21'), array(['ARG:opls_300', 'ASP:opls_272', '7179'], dtype='<U21'), array(['ARG:opls_300', 'GLU:opls_272', '4054'], dtype='<U21'), array(['HIS:opls_503', 'ASP:opls_272', '3403'], dtype='<U21'), array(['TYR:opls_167', 'GLU:opls_272', '2647'], dtype='<U21'), array(['ARG:opls_300', 'THR:opls_154', '2632'], dtype='<U21'), array(['THR:opls_154', 'ARG:opls_300', '1271'], dtype='<U21'), array(['ARG:opls_303', 'GLU:opls_272', '1254'], dtype='<U21'), array(['LYS:opls_287', 'GLU:opls_272', '1144'], dtype='<U21'), array(['THR:opls_154', 'THR:opls_272', '1089'], dtype='<U21'), array(['ARG:opls_300', 'THR:opls_272', '1088'], dtype='<U21'), array(['LYS:opls_287', 'THR:opls_272', '911'], dtype='<U21'), array(['ILE:opls_238', 'THR:opls_154', '760'], dtype='<U21'), array(['THR:opls_154', 'GLU:opls_272', '533'], dtype='<U21'), array(['THR:opls_154', 'ASP:opls_272', '520'], dtype='<U21'), array(['VAL:opls_238', 'THR:opls_154', '347'], dtype='<U21'), array(['ARG:opls_300', 'TYR:opls_167', '287'], dtype='<U21'), array(['ARG:opls_303', 'THR:opls_154', '199'], dtype='<U21'), array(['THR:opls_154', 'ARG:opls_303', '167'], dtype='<U21'), array(['TYR:opls_167', 'ARG:opls_300', '143'], dtype='<U21'), array(['TYR:opls_167', 'TYR:opls_167', '130'], dtype='<U21'), array(['ARG:opls_303', 'THR:opls_272', '124'], dtype='<U21'), array(['LYS:opls_287', 'TYR:opls_167', '112'], dtype='<U21'), array(['TYR:opls_167', 'THR:opls_272', '74'], dtype='<U21'), array(['TYR:opls_167', 'ASP:opls_272', '67'], dtype='<U21'), array(['TYR:opls_167', 'THR:opls_154', '64'], dtype='<U21'), array(['LYS:opls_287', 'THR:opls_154', '62'], dtype='<U21'), array(['THR:opls_154', 'TYR:opls_167', '62'], dtype='<U21'), array(['GLU:opls_238', 'THR:opls_154', '51'], dtype='<U21'), array(['ASP:opls_238', 'ASP:opls_272', '40'], dtype='<U21'), array(['ASN:opls_237', 'THR:opls_154', '28'], dtype='<U21'), array(['ARG:opls_300', 'TRP:opls_503', '22'], dtype='<U21'), array(['ASN:opls_237', 'GLN:opls_237', '18'], dtype='<U21'), array(['GLN:opls_237', 'ASN:opls_237', '18'], dtype='<U21'), array(['THR:opls_154', 'ASN:opls_237', '14'], dtype='<U21'), array(['TRP:opls_503', 'ARG:opls_300', '11'], dtype='<U21'), array(['HIS:opls_503', 'THR:opls_154', '10'], dtype='<U21'), array(['THR:opls_154', 'HIS:opls_511', '10'], dtype='<U21'), array(['ARG:opls_303', 'TYR:opls_167', '9'], dtype='<U21'), array(['TYR:opls_167', 'ARG:opls_303', '9'], dtype='<U21'), array(['ARG:opls_300', 'HIS:opls_511', '6'], dtype='<U21'), array(['GLN:opls_237', 'ARG:opls_303', '6'], dtype='<U21'), array(['ARG:opls_300', 'GLN:opls_237', '4'], dtype='<U21'), array(['GLN:opls_237', 'ARG:opls_300', '4'], dtype='<U21'), array(['LYS:opls_238', 'THR:opls_154', '4'], dtype='<U21'), array(['THR:opls_154', 'THR:opls_154', '4'], dtype='<U21'), array(['ARG:opls_303', 'GLN:opls_237', '3'], dtype='<U21'), array(['HIS:opls_503', 'ARG:opls_300', '3'], dtype='<U21'), array(['THR:opls_238', 'GLU:opls_272', '3'], dtype='<U21'), array(['GLN:opls_238', 'GLN:opls_237', '2'], dtype='<U21'), array(['GLY:opls_238', 'THR:opls_154', '2'], dtype='<U21'), array(['LEU:opls_238', 'ASN:opls_237', '1'], dtype='<U21'), array(['LEU:opls_238', 'ASP:opls_272', '1'], dtype='<U21'), array(['THR:opls_238', 'TYR:opls_167', '1'], dtype='<U21')]
data_destilated = data_sorted[:16]
set_hdonors_acceptors = set([(x[0].split(':')[0], x[1].split(':')[0]) for x in data_destilated])
set_hdonors_acceptors
{('ARG', 'ARG'),
('ARG', 'ASP'),
('ARG', 'GLU'),
('ARG', 'THR'),
('HIS', 'ASP'),
('LYS', 'GLU'),
('LYS', 'THR'),
('THR', 'ARG'),
('THR', 'THR'),
('TYR', 'GLU')}
Analyse H-bonds for all the above residue pairs
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA
data = {}
for k in range(len(list(set_hdonors_acceptors))):
# def hdnor-acceptor residues
res_hdonor = list(set_hdonors_acceptors)[k][0]
res_acceptor = list(set_hdonors_acceptors)[k][1]
# setup and execute hbond analysis
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("resname "+res_hdonor)
hbonds.acceptors_sel = hbonds.guess_acceptors("resname "+res_acceptor)
hbonds.run()
# store data
key = res_hdonor+'-'+res_acceptor
data[key] = hbonds.count_by_time()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyattrs.py:2261: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array(sorted(unique_bonds)), 4) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyobjects.py:600: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations guessed = np.asarray(guessed, dtype=np.bool) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:521: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations counts[indices.astype(np.int)] = tmp_counts
Plot all data
fig,ax = plt.subplots(len(list(data.keys())),1,figsize=(12,3*len(list(data.keys()))),dpi=100)
for k in range(len(list(data.keys()))):
key = list(data.keys())[k]
ax[k].plot(data[key],color=np.random.rand(3),marker='o',label=key,lw=0.5,alpha=0.5)
ax[k].legend(loc='upper left')
plt.show()
set_hdonors = set([x[0].split(':')[0] for x in data_destilated])
set_acceptors = set([x[1].split(':')[0] for x in data_destilated])
list(set_hdonors.union(set_acceptors))
['HIS', 'TYR', 'ARG', 'ASP', 'LYS', 'GLU', 'THR']
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("resname HIS TYR ARG ASP LYS GLU THR")
hbonds.acceptors_sel = hbonds.guess_acceptors("resname HIS TYR ARG ASP LYS GLU THR")
hbonds.run()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyattrs.py:2261: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array(sorted(unique_bonds)), 4) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyobjects.py:600: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations guessed = np.asarray(guessed, dtype=np.bool)
<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7f765bc0e8b0>
data_leading_residues = hbonds.count_by_time()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:521: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations counts[indices.astype(np.int)] = tmp_counts
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("protein")
hbonds.acceptors_sel = hbonds.guess_acceptors("protein")
hbonds.run()
<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7f765a97c7f0>
data_protein = hbonds.count_by_time()
Compare H-bonds from leading residues forming H-bonds wrt the whole protein
data_leading_residues
array([188, 181, 176, ..., 187, 184, 187])
data_protein
array([188, 182, 177, ..., 191, 188, 192])
fig,ax = plt.subplots(2,1,figsize=(12,6),sharex=True,sharey=True,dpi=100)
ax[0].plot(data_protein,color='blue',marker='o',label='H-bonds whole protein',lw=0.5,alpha=0.5)
ax[1].plot(data_leading_residues,color='green',marker='o',label='H-bonds leading residues',lw=0.5,alpha=0.5)
ax[0].legend()
ax[1].legend()
plt.show()
import MDAnalysis as mda
workdir = "data/cWza-Y373C_conformation1_0337/"
u = mda.Universe(workdir+'md_100ns.tpr', workdir+'md_100ns.xtc',in_memory=True)
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("protein")
hbonds.acceptors_sel = hbonds.guess_acceptors("protein")
hbonds.run()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyattrs.py:2261: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array(sorted(unique_bonds)), 4) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyobjects.py:600: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations guessed = np.asarray(guessed, dtype=np.bool)
<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7f7659ccb0d0>
hbonds.count_by_type()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:539: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations d = self.u.atoms[self.hbonds[:, 1].astype(np.int)] /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:543: DeprecationWarning: `np.str` is a deprecated alias for the builtin `str`. To silence this warning, use `str` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.str_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations dtype=np.str).T
array([['ARG:opls_300', 'ARG:opls_300', '48320'],
['ARG:opls_300', 'ARG:opls_303', '48310'],
['ARG:opls_300', 'GLN:opls_237', '8'],
['ARG:opls_300', 'GLU:opls_272', '21582'],
['ARG:opls_300', 'SER:opls_154', '10'],
['ARG:opls_300', 'THR:opls_154', '247'],
['ARG:opls_300', 'THR:opls_272', '2378'],
['ARG:opls_300', 'TRP:opls_503', '18'],
['ARG:opls_303', 'ARG:opls_300', '24767'],
['ARG:opls_303', 'GLN:opls_237', '1'],
['ARG:opls_303', 'GLU:opls_272', '4138'],
['ARG:opls_303', 'THR:opls_154', '243'],
['ASN:opls_237', 'GLN:opls_237', '10'],
['ASN:opls_237', 'SER:opls_154', '764'],
['ASN:opls_237', 'THR:opls_154', '1421'],
['ASN:opls_238', 'ASN:opls_237', '2'],
['ASP:opls_238', 'ASP:opls_272', '172'],
['GLN:opls_237', 'ARG:opls_300', '8'],
['GLN:opls_237', 'ARG:opls_303', '2'],
['GLN:opls_237', 'ASN:opls_237', '12'],
['GLN:opls_237', 'SER:opls_154', '614'],
['GLN:opls_238', 'GLN:opls_237', '155'],
['GLN:opls_238', 'SER:opls_154', '116'],
['GLU:opls_238', 'ARG:opls_300', '1'],
['GLU:opls_238', 'THR:opls_154', '123'],
['GLY:opls_238', 'ASP:opls_272', '15'],
['GLY:opls_238', 'THR:opls_154', '1'],
['HIS:opls_503', 'ARG:opls_300', '1'],
['HIS:opls_503', 'ASP:opls_272', '2091'],
['HIS:opls_503', 'THR:opls_154', '54'],
['ILE:opls_238', 'THR:opls_154', '534'],
['LEU:opls_238', 'ASP:opls_272', '4'],
['LYS:opls_287', 'GLU:opls_272', '102'],
['LYS:opls_287', 'THR:opls_154', '105'],
['LYS:opls_287', 'THR:opls_272', '5746'],
['SER:opls_154', 'ARG:opls_300', '2'],
['SER:opls_154', 'ASN:opls_237', '382'],
['SER:opls_154', 'GLN:opls_237', '280'],
['SER:opls_154', 'THR:opls_154', '40'],
['SER:opls_238', 'SER:opls_154', '6219'],
['THR:opls_154', 'ARG:opls_300', '117'],
['THR:opls_154', 'ARG:opls_303', '212'],
['THR:opls_154', 'ASN:opls_237', '645'],
['THR:opls_154', 'ASP:opls_272', '556'],
['THR:opls_154', 'GLU:opls_272', '596'],
['THR:opls_154', 'HIS:opls_511', '2'],
['THR:opls_154', 'SER:opls_154', '42'],
['THR:opls_154', 'THR:opls_154', '2417'],
['THR:opls_154', 'THR:opls_272', '1417'],
['THR:opls_238', 'THR:opls_154', '31343'],
['THR:opls_238', 'THR:opls_272', '7626'],
['TRP:opls_503', 'ARG:opls_300', '9'],
['VAL:opls_238', 'THR:opls_154', '275']], dtype='<U21')
data = hbonds.count_by_type()
data_sorted = sorted(data, key=lambda x: int(x[-1]), reverse=True)
data_sorted
[array(['ARG:opls_300', 'ARG:opls_300', '48320'], dtype='<U21'), array(['ARG:opls_300', 'ARG:opls_303', '48310'], dtype='<U21'), array(['THR:opls_238', 'THR:opls_154', '31343'], dtype='<U21'), array(['ARG:opls_303', 'ARG:opls_300', '24767'], dtype='<U21'), array(['ARG:opls_300', 'GLU:opls_272', '21582'], dtype='<U21'), array(['THR:opls_238', 'THR:opls_272', '7626'], dtype='<U21'), array(['SER:opls_238', 'SER:opls_154', '6219'], dtype='<U21'), array(['LYS:opls_287', 'THR:opls_272', '5746'], dtype='<U21'), array(['ARG:opls_303', 'GLU:opls_272', '4138'], dtype='<U21'), array(['THR:opls_154', 'THR:opls_154', '2417'], dtype='<U21'), array(['ARG:opls_300', 'THR:opls_272', '2378'], dtype='<U21'), array(['HIS:opls_503', 'ASP:opls_272', '2091'], dtype='<U21'), array(['ASN:opls_237', 'THR:opls_154', '1421'], dtype='<U21'), array(['THR:opls_154', 'THR:opls_272', '1417'], dtype='<U21'), array(['ASN:opls_237', 'SER:opls_154', '764'], dtype='<U21'), array(['THR:opls_154', 'ASN:opls_237', '645'], dtype='<U21'), array(['GLN:opls_237', 'SER:opls_154', '614'], dtype='<U21'), array(['THR:opls_154', 'GLU:opls_272', '596'], dtype='<U21'), array(['THR:opls_154', 'ASP:opls_272', '556'], dtype='<U21'), array(['ILE:opls_238', 'THR:opls_154', '534'], dtype='<U21'), array(['SER:opls_154', 'ASN:opls_237', '382'], dtype='<U21'), array(['SER:opls_154', 'GLN:opls_237', '280'], dtype='<U21'), array(['VAL:opls_238', 'THR:opls_154', '275'], dtype='<U21'), array(['ARG:opls_300', 'THR:opls_154', '247'], dtype='<U21'), array(['ARG:opls_303', 'THR:opls_154', '243'], dtype='<U21'), array(['THR:opls_154', 'ARG:opls_303', '212'], dtype='<U21'), array(['ASP:opls_238', 'ASP:opls_272', '172'], dtype='<U21'), array(['GLN:opls_238', 'GLN:opls_237', '155'], dtype='<U21'), array(['GLU:opls_238', 'THR:opls_154', '123'], dtype='<U21'), array(['THR:opls_154', 'ARG:opls_300', '117'], dtype='<U21'), array(['GLN:opls_238', 'SER:opls_154', '116'], dtype='<U21'), array(['LYS:opls_287', 'THR:opls_154', '105'], dtype='<U21'), array(['LYS:opls_287', 'GLU:opls_272', '102'], dtype='<U21'), array(['HIS:opls_503', 'THR:opls_154', '54'], dtype='<U21'), array(['THR:opls_154', 'SER:opls_154', '42'], dtype='<U21'), array(['SER:opls_154', 'THR:opls_154', '40'], dtype='<U21'), array(['ARG:opls_300', 'TRP:opls_503', '18'], dtype='<U21'), array(['GLY:opls_238', 'ASP:opls_272', '15'], dtype='<U21'), array(['GLN:opls_237', 'ASN:opls_237', '12'], dtype='<U21'), array(['ARG:opls_300', 'SER:opls_154', '10'], dtype='<U21'), array(['ASN:opls_237', 'GLN:opls_237', '10'], dtype='<U21'), array(['TRP:opls_503', 'ARG:opls_300', '9'], dtype='<U21'), array(['ARG:opls_300', 'GLN:opls_237', '8'], dtype='<U21'), array(['GLN:opls_237', 'ARG:opls_300', '8'], dtype='<U21'), array(['LEU:opls_238', 'ASP:opls_272', '4'], dtype='<U21'), array(['ASN:opls_238', 'ASN:opls_237', '2'], dtype='<U21'), array(['GLN:opls_237', 'ARG:opls_303', '2'], dtype='<U21'), array(['SER:opls_154', 'ARG:opls_300', '2'], dtype='<U21'), array(['THR:opls_154', 'HIS:opls_511', '2'], dtype='<U21'), array(['ARG:opls_303', 'GLN:opls_237', '1'], dtype='<U21'), array(['GLU:opls_238', 'ARG:opls_300', '1'], dtype='<U21'), array(['GLY:opls_238', 'THR:opls_154', '1'], dtype='<U21'), array(['HIS:opls_503', 'ARG:opls_300', '1'], dtype='<U21')]
import pandas as pd
df = pd.DataFrame(data_sorted, columns=['hdonor','acceptor','n_hbonds'])
df
| hdonor | acceptor | n_hbonds | |
|---|---|---|---|
| 0 | ARG:opls_300 | ARG:opls_300 | 48320 |
| 1 | ARG:opls_300 | ARG:opls_303 | 48310 |
| 2 | THR:opls_238 | THR:opls_154 | 31343 |
| 3 | ARG:opls_303 | ARG:opls_300 | 24767 |
| 4 | ARG:opls_300 | GLU:opls_272 | 21582 |
| 5 | THR:opls_238 | THR:opls_272 | 7626 |
| 6 | SER:opls_238 | SER:opls_154 | 6219 |
| 7 | LYS:opls_287 | THR:opls_272 | 5746 |
| 8 | ARG:opls_303 | GLU:opls_272 | 4138 |
| 9 | THR:opls_154 | THR:opls_154 | 2417 |
| 10 | ARG:opls_300 | THR:opls_272 | 2378 |
| 11 | HIS:opls_503 | ASP:opls_272 | 2091 |
| 12 | ASN:opls_237 | THR:opls_154 | 1421 |
| 13 | THR:opls_154 | THR:opls_272 | 1417 |
| 14 | ASN:opls_237 | SER:opls_154 | 764 |
| 15 | THR:opls_154 | ASN:opls_237 | 645 |
| 16 | GLN:opls_237 | SER:opls_154 | 614 |
| 17 | THR:opls_154 | GLU:opls_272 | 596 |
| 18 | THR:opls_154 | ASP:opls_272 | 556 |
| 19 | ILE:opls_238 | THR:opls_154 | 534 |
| 20 | SER:opls_154 | ASN:opls_237 | 382 |
| 21 | SER:opls_154 | GLN:opls_237 | 280 |
| 22 | VAL:opls_238 | THR:opls_154 | 275 |
| 23 | ARG:opls_300 | THR:opls_154 | 247 |
| 24 | ARG:opls_303 | THR:opls_154 | 243 |
| 25 | THR:opls_154 | ARG:opls_303 | 212 |
| 26 | ASP:opls_238 | ASP:opls_272 | 172 |
| 27 | GLN:opls_238 | GLN:opls_237 | 155 |
| 28 | GLU:opls_238 | THR:opls_154 | 123 |
| 29 | THR:opls_154 | ARG:opls_300 | 117 |
| 30 | GLN:opls_238 | SER:opls_154 | 116 |
| 31 | LYS:opls_287 | THR:opls_154 | 105 |
| 32 | LYS:opls_287 | GLU:opls_272 | 102 |
| 33 | HIS:opls_503 | THR:opls_154 | 54 |
| 34 | THR:opls_154 | SER:opls_154 | 42 |
| 35 | SER:opls_154 | THR:opls_154 | 40 |
| 36 | ARG:opls_300 | TRP:opls_503 | 18 |
| 37 | GLY:opls_238 | ASP:opls_272 | 15 |
| 38 | GLN:opls_237 | ASN:opls_237 | 12 |
| 39 | ARG:opls_300 | SER:opls_154 | 10 |
| 40 | ASN:opls_237 | GLN:opls_237 | 10 |
| 41 | TRP:opls_503 | ARG:opls_300 | 9 |
| 42 | ARG:opls_300 | GLN:opls_237 | 8 |
| 43 | GLN:opls_237 | ARG:opls_300 | 8 |
| 44 | LEU:opls_238 | ASP:opls_272 | 4 |
| 45 | ASN:opls_238 | ASN:opls_237 | 2 |
| 46 | GLN:opls_237 | ARG:opls_303 | 2 |
| 47 | SER:opls_154 | ARG:opls_300 | 2 |
| 48 | THR:opls_154 | HIS:opls_511 | 2 |
| 49 | ARG:opls_303 | GLN:opls_237 | 1 |
| 50 | GLU:opls_238 | ARG:opls_300 | 1 |
| 51 | GLY:opls_238 | THR:opls_154 | 1 |
| 52 | HIS:opls_503 | ARG:opls_300 | 1 |
data_destilated = data_sorted[:16]
set_hdonors_acceptors = set([(x[0].split(':')[0], x[1].split(':')[0]) for x in data_destilated])
set_hdonors_acceptors
{('ARG', 'ARG'),
('ARG', 'GLU'),
('ARG', 'THR'),
('ASN', 'SER'),
('ASN', 'THR'),
('HIS', 'ASP'),
('LYS', 'THR'),
('SER', 'SER'),
('THR', 'ASN'),
('THR', 'THR')}
Analyse H-bonds for all the above residue pairs
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA
data = {}
for k in range(len(list(set_hdonors_acceptors))):
# def hdnor-acceptor residues
res_hdonor = list(set_hdonors_acceptors)[k][0]
res_acceptor = list(set_hdonors_acceptors)[k][1]
# setup and execute hbond analysis
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("resname "+res_hdonor)
hbonds.acceptors_sel = hbonds.guess_acceptors("resname "+res_acceptor)
hbonds.run()
# store data
key = res_hdonor+'-'+res_acceptor
data[key] = hbonds.count_by_time()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyattrs.py:2261: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array(sorted(unique_bonds)), 4) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyobjects.py:600: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations guessed = np.asarray(guessed, dtype=np.bool) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:521: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations counts[indices.astype(np.int)] = tmp_counts
Plot all data
fig,ax = plt.subplots(len(list(data.keys())),1,figsize=(12,3*len(list(data.keys()))),dpi=100)
for k in range(len(list(data.keys()))):
key = list(data.keys())[k]
ax[k].plot(data[key],color=np.random.rand(3),marker='o',label=key,lw=0.5,alpha=0.5)
ax[k].legend(loc='upper left')
plt.show()
set_hdonors = set([x[0].split(':')[0] for x in data_destilated])
set_acceptors = set([x[1].split(':')[0] for x in data_destilated])
' '.join(list(set_hdonors.union(set_acceptors)))
'HIS SER ARG ASN ASP LYS GLU THR'
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("resname HIS SER ARG ASN ASP LYS GLU THR")
hbonds.acceptors_sel = hbonds.guess_acceptors("resname HIS SER ARG ASN ASP LYS GLU THR")
hbonds.run()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyattrs.py:2261: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. np.array(sorted(unique_bonds)), 4) /data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/core/topologyobjects.py:600: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations guessed = np.asarray(guessed, dtype=np.bool)
<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7f7667154af0>
data_leading_residues = hbonds.count_by_time()
/data/dragon000/sanjuan/anaconda3/lib/python3.8/site-packages/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py:521: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations counts[indices.astype(np.int)] = tmp_counts
hbonds = HBA(universe=u,d_h_a_angle_cutoff=30)
hbonds.hydrogens_sel = hbonds.guess_hydrogens("protein")
hbonds.acceptors_sel = hbonds.guess_acceptors("protein")
hbonds.run()
<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7f765957d5e0>
data_protein = hbonds.count_by_time()
Compare H-bonds from leading residues forming H-bonds wrt the whole protein
data_leading_residues
array([195, 199, 199, ..., 203, 211, 212])
data_protein
array([198, 201, 200, ..., 205, 213, 213])
fig,ax = plt.subplots(2,1,figsize=(12,6),sharex=True,sharey=True,dpi=100)
ax[0].plot(data_protein,color='blue',marker='o',label='H-bonds whole protein',lw=0.5,alpha=0.5)
ax[1].plot(data_leading_residues,color='green',marker='o',label='H-bonds leading residues',lw=0.5,alpha=0.5)
ax[0].legend()
ax[1].legend()
plt.show()
data_folder = "data/md_traj_analysis/"
clean_line = lambda line: list(map(float, line.strip().split()))
exclude_regex = lambda line: not re.search("#|@",line)
mutant_names = ['cWza', 'cWza-K375C', 'cWza-S355C', 'cWza-Y373C']
sns.set_style("darkgrid")
fig, ax = plt.subplots(2,2,figsize=(18,10),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
Sequences = {
'cWza': 'APLVRWNRVISQLVPTITGVHDLTETVRYIKT',
'cWza-K375C' : 'APLVRWNRVISQLVPTITGVHDLTETVRYICT',
'cWza-S355C': 'APLVRWNRVICQLVPTITGVHDLTETVRYIKT',
'cWza-Y373C': 'APLVRWNRVISQLVPTITGVHDLTETVRCIKT'
}
for mutant in mutant_names:
ax = Axes[mutant]
for g in Conformations[mutant]:
files=glob.glob(data_folder+mutant+'_'+'conformation'+str(g)+'_'+'*rmsf_C-alpha.xvg')
for path in files:
data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
data = np.array(data)
ax.scatter(*data.T, color=Colors[g], lw=0.25, alpha=0.15)
ax.set_xticks(range(1,33))
for resn in range(32):
ax.text(resn+0.5, 0.02,Sequences[mutant][resn],fontsize=15)
ax.set_title(mutant, fontsize=20)
ax.set_xlabel("Residue number", fontsize=25)
ax.set_ylabel("$C_\\alpha$-RMSF", fontsize=25)
ax.set_xlim(0, 33)
ax.set_ylim(0, 1)
fig.tight_layout()
plt.show()
sns.set_style("darkgrid")
fig, ax = plt.subplots(2,2,figsize=(18,10),dpi=200, sharex=True, sharey=True)
Axes = {
'cWza': ax[0][0],
'cWza-K375C': ax[0][1],
'cWza-S355C': ax[1][0],
'cWza-Y373C': ax[1][1]
}
Conformations = {
'cWza': [0, 1],
'cWza-K375C': [0, 1],
'cWza-S355C': [0, 1],
'cWza-Y373C': [1]
}
Colors = {0:"blue", 1:'green'}
Sequences = {
'cWza': 'APLVRWNRVISQLVPTITGVHDLTETVRYIKT',
'cWza-K375C' : 'APLVRWNRVISQLVPTITGVHDLTETVRYICT',
'cWza-S355C': 'APLVRWNRVICQLVPTITGVHDLTETVRYIKT',
'cWza-Y373C': 'APLVRWNRVISQLVPTITGVHDLTETVRCIKT'
}
test_data = {}
for mutant in mutant_names:
ax = Axes[mutant]
test_data['residue'] = []
test_data['rmsf'] = []
for g in Conformations[mutant]:
files=glob.glob(data_folder+mutant+'_'+'conformation'+str(g)+'_'+'*rmsf_C-alpha.xvg')
for path in files:
data = [clean_line(l) for l in open(
path, 'r').readlines() if exclude_regex(l)]
data = np.array(data).T
X = data[0]; Y = data[1]
test_data['residue'] = test_data['residue'] + list(X)
test_data['rmsf'] = test_data['rmsf'] + list(Y)
df = pd.DataFrame(test_data)
sns.lineplot(x='residue',y='rmsf',data = df,color=Colors[g], ax=ax)
ax.set_xticks(range(1,33))
for resn in range(32):
ax.text(resn+0.5, 0.02,Sequences[mutant][resn],fontsize=15)
ax.set_title(mutant, fontsize=20)
ax.set_xlabel("Residue number", fontsize=25)
ax.set_ylabel("$C_\\alpha$-RMSF", fontsize=25)
ax.set_xlim(0, 33)
ax.set_ylim(0, 1)
fig.tight_layout()
plt.show()
import os
import sys
import json
import pandas
import seaborn
import operator
import concurrent.futures
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import matplotlib.pyplot as plt
from setup_db_metrics import Base, Tags, Pore_Dimensions, Radii_of_Gyration
dbfile = 'data/conf_metrics_Last10ns.db'
# Create engine and bind it to current session
engine = create_engine('sqlite:///'+dbfile)
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()
TAGS = [
['cWza', 'conformation0'],
['cWza', 'conformation1'],
['cWza-K375C', 'conformation0'],
['cWza-K375C', 'conformation1'],
['cWza-S355C', 'conformation0'],
['cWza-S355C', 'conformation1'],
['cWza-Y373C', 'conformation1']
]
TAGS = [json.dumps(tag) for tag in TAGS]
DFS = {}
for tag in TAGS:
mutant, conformation = json.loads(tag)
models_ids = session.query(Tags.id).filter_by(mutant=mutant, group=conformation).all()
models_ids = [x[0] for x in models_ids]
print(len(models_ids))
data = []
for id in models_ids:
try:
x = session.query(Pore_Dimensions.pore_length).filter_by(id=id)[0][0]
y = session.query(Pore_Dimensions.pore_Rmin).filter_by(id=id)[0][0]
data.append([x,y])
except:
print("No data for id", id)
DFS[tag] = pandas.DataFrame(data, columns=['length','Rmin'])
839 305 166 152 405 632 582
# models_ids = session.query(Tags.id).filter_by(mutant='cWza',frame='Protein_975').all()
pdb_name0 = 'refined1_0001_INPUT_0047_ignorechain'
models_ids = session.query(Tags.id).filter_by(mutant='cWza',group='conformation1', pdb_name=pdb_name0).all()
models_ids = [x[0] for x in models_ids]
print(len(models_ids))
testdata = []
for id in models_ids:
x = session.query(Pore_Dimensions.pore_length).filter_by(id=id)[0][0]
y = session.query(Pore_Dimensions.pore_Rmin).filter_by(id=id)[0][0]
group = session.query(Tags.group).filter_by(id=id)[0][0]
pdb_name = session.query(Tags.pdb_name).filter_by(id=id)[0][0]
mutant = session.query(Tags.mutant).filter_by(id=id)[0][0]
frame = session.query(Tags.frame).filter_by(id=id)[0][0]
testdata.append(frame)
print(id,[x,y], group, mutant, frame, pdb_name == pdb_name0)
56 630 [39.1175, 8.13735] conformation1 cWza Protein_901 True 631 [39.008750000000006, 8.33563] conformation1 cWza Protein_905 True 632 [39.2975, 7.8274] conformation1 cWza Protein_906 True 633 [39.49125, 7.22182] conformation1 cWza Protein_913 True 634 [39.1775, 7.69792] conformation1 cWza Protein_902 True 635 [39.3875, 8.029] conformation1 cWza Protein_904 True 636 [39.0325, 7.82308] conformation1 cWza Protein_917 True 637 [39.254999999999995, 7.68256] conformation1 cWza Protein_908 True 638 [39.0875, 7.74078] conformation1 cWza Protein_910 True 639 [38.760000000000005, 7.24948] conformation1 cWza Protein_918 True 640 [39.06125, 7.97291] conformation1 cWza Protein_921 True 641 [38.58624999999999, 7.5526] conformation1 cWza Protein_930 True 642 [38.08, 7.67304] conformation1 cWza Protein_920 True 643 [38.338750000000005, 8.16492] conformation1 cWza Protein_923 True 644 [38.4775, 8.25895] conformation1 cWza Protein_925 True 645 [38.53875, 8.17393] conformation1 cWza Protein_929 True 646 [38.43, 7.9226] conformation1 cWza Protein_932 True 647 [38.93124999999999, 7.94455] conformation1 cWza Protein_931 True 648 [39.16375, 7.90102] conformation1 cWza Protein_934 True 649 [39.227500000000006, 7.89845] conformation1 cWza Protein_933 True 650 [38.4725, 7.8009] conformation1 cWza Protein_936 True 651 [38.2025, 7.38247] conformation1 cWza Protein_937 True 652 [38.62625, 7.72747] conformation1 cWza Protein_939 True 653 [38.6775, 7.95015] conformation1 cWza Protein_938 True 654 [38.9725, 8.02464] conformation1 cWza Protein_946 True 655 [38.67375, 7.75034] conformation1 cWza Protein_942 True 656 [38.95375, 7.73707] conformation1 cWza Protein_943 True 657 [38.809999999999995, 8.31319] conformation1 cWza Protein_950 True 658 [38.615, 7.89702] conformation1 cWza Protein_960 True 659 [38.62625, 7.81738] conformation1 cWza Protein_947 True 660 [38.89875, 7.81496] conformation1 cWza Protein_955 True 661 [39.24125000000001, 8.0052] conformation1 cWza Protein_954 True 662 [38.1075, 8.00453] conformation1 cWza Protein_961 True 663 [38.14, 8.2242] conformation1 cWza Protein_966 True 664 [38.53, 8.01649] conformation1 cWza Protein_962 True 665 [38.50125, 8.16717] conformation1 cWza Protein_964 True 666 [38.445, 7.94989] conformation1 cWza Protein_965 True 667 [38.165, 7.7303] conformation1 cWza Protein_968 True 668 [39.190000000000005, 8.11179] conformation1 cWza Protein_973 True 669 [38.027499999999996, 7.85591] conformation1 cWza Protein_969 True 670 [39.022499999999994, 7.92202] conformation1 cWza Protein_972 True 671 [38.50749999999999, 7.83934] conformation1 cWza Protein_971 True 672 [39.371249999999996, 7.84736] conformation1 cWza Protein_976 True 673 [38.95125, 8.04932] conformation1 cWza Protein_978 True 674 [38.287499999999994, 7.95133] conformation1 cWza Protein_991 True 675 [38.52, 8.32921] conformation1 cWza Protein_980 True 676 [39.16375000000001, 7.86047] conformation1 cWza Protein_992 True 677 [38.067499999999995, 7.9676] conformation1 cWza Protein_981 True 678 [38.7125, 7.76343] conformation1 cWza Protein_982 True 679 [38.93375, 7.91463] conformation1 cWza Protein_985 True 680 [39.19, 8.3122] conformation1 cWza Protein_983 True 681 [38.553749999999994, 7.88428] conformation1 cWza Protein_987 True 682 [38.535, 8.12002] conformation1 cWza Protein_994 True 683 [38.785, 7.99696] conformation1 cWza Protein_997 True 684 [38.981249999999996, 8.24051] conformation1 cWza Protein_999 True 685 [38.70875, 7.82704] conformation1 cWza Protein_998 True
testdata.sort()
len(testdata)
56
x = [
839,
305,
166,
152,
405,
632,
582]
sum(x)
3081
MUTANTS = ['cWza','cWza-K375C','cWza-S355C','cWza-Y373C']
CONFORMATIONS = {
'cWza':['conformation0', 'conformation1'],
'cWza-K375C':['conformation0', 'conformation1'],
'cWza-S355C':['conformation0', 'conformation1'],
'cWza-Y373C':['conformation1'],
}
fig, ax = plt.subplots(2,2,figsize=(14,12),sharex=True, sharey=True,dpi=200)
seaborn.set_style('darkgrid')
axes = {
'cWza':ax[0,0],
'cWza-K375C':ax[0,1],
'cWza-S355C':ax[1,0],
'cWza-Y373C':ax[1,1]
}
COLOURS = {
'conformation0':'blue',
'conformation1':'green'
}
for tag in TAGS:
mutant,conformation = json.loads(tag)
seaborn.scatterplot(
x='length',
y='Rmin',
data=DFS[tag],
color=COLOURS[conformation],
ax=axes[mutant]
)
###############################################
# Customise plot
###############################################
axes[mutant].set_title("MD "+mutant, fontsize=20)
axes[mutant].tick_params(axis='both',direction='in',labelsize=20)
axes[mutant].set_xlabel("$L$ ($\AA$)",fontsize=25)
axes[mutant].set_ylabel("$R_{min}$ ($\AA$)",fontsize=25)
axes[mutant].set_xlim(30,50)
axes[mutant].set_ylim(2,12)
plt.tight_layout()
plt.show()
for n in `seq -f "%04g" 1 1000`; do line=$(grep "TAG" refined1_0001_INPUT_${n}_ignorechain.hole_dat); echo $n $line; done > cWza_hole_1-1000.raw_dat
Script to add chain numbers to protein PDBs extracted from md_100ns.gro files
from pymol import cmd
import sys
pdb_in = sys.argv[1] # Input PDB path
pdb_out = sys.argv[2] # Output PDB path
cmd.load(pdb_in,"MyProtein")
N_atoms = cmd.count_atoms("MyProtein")
Chains = ['A', 'B','C','D','E','F','G','H']
atoms_per_chain = N_atoms/len(Chains)
for k in range(len(Chains)):
atom_number_intial = int( 1 + k*atoms_per_chain )
atom_number_final = int( (k + 1)*atoms_per_chain )
selection = "id "+ str(atom_number_intial) + ":" + str(atom_number_final)
expression = "chain='"+Chains[k]+"'"
cmd.alter(selection, expression)
cmd.set("retain_order",1)
cmd.save(pdb_out ,"MyProtein")
Commands to fix all chains in all models using a Python script
for pdb in `ls md_selected_models/*md_100ns.pdb`; do
pymol -qc ../add_chain_numbers.py -- $pdb ${pdb%.pdb}_FIXED-CHAINS.pdb;
done
Simple Python script to overlay docking, MD, and wild-type Wza-D4 models
from pymol import cmd
import sys
pdb1 = sys.argv[1] # PDB docking model
pdb2 = sys.argv[2] # PDB md_100ns model
wza_pdb = "data/md_selected_models/wzaD4-consensus-full.pdb"
cmd.load(pdb1,"docking")
cmd.load(pdb2,"md_100ns")
cmd.load(wza_pdb,"wza")
cmd.align("docking", "wza")
cmd.align("md_100ns", "wza")
cmd.hide("spheres","all")
cmd.show("cartoon","all")
cmd.show("sticks",'resname tyr+cys')
cmd.set("orthoscopic",1)
cmd.bg_color('white')